clear
set more off
cd "D:\data_replication"

use estimation\1_data_format\production_imports.dta, clear

gen ll = length(pc8plus)
gen pc4 = pc8plus
replace pc4 = substr(pc8plus, 1, 4) if ll == 8 
drop ll


// DROP OUTLIERS AND MISSING OBSERVATIONS 
//------------------------------------------------------------------------------

drop if price_pc8plus == .
sort pc8plus
by pc8plus: egen price_mean = mean(price_pc8plus)
by pc8plus: egen price_median = median(price_pc8plus)
gen ratio_mean = price_pc8plus / price_mean 
gen ratio_median = price_pc8plus / price_median 
gen ratio_mean_reversed = price_mean / price_pc8plus  
gen ratio_median_reversed =  price_median / price_pc8plus
keep if ratio_median < 30
keep if ratio_median_reversed < 30
drop price_mean price_median ratio_mean ratio_median ratio_mean_reversed ratio_median_reversed


// Population Partner
merge m:1 year partner using data\population\population_partner.dta
keep if _merge == 3
drop _merge partner_description
rename population population_partner

//GDP per capita Partner
merge m:1 partner year using data\gdp_per_capita\gdp_per_capita_defl.dta
keep if _merge == 3
drop _merge
rename gdp_per_capita gdp_per_capita_partner
rename gdp_per_capita_defl gdp_per_capita_defl_partner

// Distance
merge m:1 declarant partner using data\distance\distance_declarant_partner.dta
keep if _merge == 3
drop _merge

// Developed Country Dummy
merge m:1 partner using data\country_codes\developed_country.dta
keep if _merge == 3
drop _merge

// Exchange Rates
merge m:1 partner year using data\exchange_rates\D_exchange_rate_partner.dta
keep if _merge == 3
drop _merge


// Aggregate 
//------------------------------------------------------------------------------

sort pc4 declarant partner year quarter
by pc4 declarant partner year quarter: egen imports_quantity_pc4 = sum(imports_quantity_pc8plus)
by pc4 declarant partner year quarter: egen imports_value_pc4 = sum(imports_value_pc8plus)
by pc4 declarant partner year quarter: keep if _n == 1
gen price_pc4 = imports_value_pc4 / imports_quantity_pc4


// Create Shares
//------------------------------------------------------------------------------

sort pc4 declarant year quarter
by pc4 declarant year quarter: egen imports_quantity_pc4_total = sum(imports_quantity_pc4)
by pc4 declarant year quarter: egen imports_value_pc4_total = sum(imports_value_pc4)
gen share = imports_quantity_pc4 / imports_quantity_pc4_total
gen exp_share = imports_value_pc4 / imports_value_pc4_total
drop imports_value_pc8plus imports_quantity_pc8plus price_pc8plus 


// Create Hausmann Instruments
//------------------------------------------------------------------------------

// Current Year

sort pc4 year partner declarant
gen price_pc4_weighted = price_pc4 * share
by pc4 year partner: egen price_sum = sum(price_pc4)
by pc4 year partner: egen price_sum_weighted=sum(price_pc4_weighted)
by pc4 year partner: gen number_importers=_N
gen price_sum_ex_own = price_sum - price_pc4
gen price_sum_ex_own_weighted = price_sum_weighted - price_pc4_weighted
by pc4 year partner: egen weight_sum = sum(share)
gen price_ex_own = price_sum_ex_own/(number_importers-1)
gen price_ex_own_weighted = price_sum_ex_own_weighted/(weight_sum-share)
drop if price_ex_own == .
drop if price_ex_own_weighted == .
drop weight_sum price_sum_ex_own_weighted price_sum_ex_own number_importers price_sum_weighted price_sum price_pc4_weighted

// All years

sort pc4 partner declarant
by pc4 partner: egen price_sum = sum(price_pc4)
by pc4 partner: gen number_importers =_N
gen price_sum_ex_own = price_sum - price_pc4
gen price_ex_own_all_years = price_sum_ex_own/(number_importers-1)
drop price_sum number_importers price_sum_ex_own
drop if price_ex_own_all_years == .


// Format
//------------------------------------------------------------------------------

sort pc4 declarant year quarter
by pc4 declarant year quarter: gen products = _N
drop if products == 1

egen product_id = group(pc4)
sort pc4 year quarter declarant share
order year quarter declarant partner share exp_share price_pc4 price_ex_own price_ex_own_weighted price_ex_own_all_years D_exchange_rate gdp_per_capita_partner gdp_per_capita_defl_partner distance developed_country population_partner 

save robustness\PC4\1_data_format\data_base_pc4.dta, replace


forval j = 1/340 {
use robustness\PC4\1_data_format\data_base_pc4.dta, clear
keep if product_id == `j'
keep year quarter declarant partner share exp_share price_pc4 price_ex_own price_ex_own_weighted price_ex_own_all_years D_exchange_rate gdp_per_capita_partner gdp_per_capita_defl_partner distance developed_country population_partner products
egen market_id = group(year quarter declarant)
save robustness\PC4\1_data_format\product_files\data_product_`j'.dta, replace
outsheet using robustness\PC4\1_data_format\product_files\data_product_`j'.csv, comma replace
}


